from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
Driver Analysis is a powerful tool that can help you understand the factors that influence loyalty. Driver Analysis attempts to identify the attributes that are most correlated with loyalty (as measured by NPS), and illustrates areas where you are under (or over) delivering. This information can then be used to prioritize the investment of capital, time, and resources into areas that will yield the highest return in customer loyalty.
A key driver analysis investigates the relationships between potential drivers and customer behavior such as the likelihood of a positive recommendation, overall satisfaction, or propensity to buy a product. This is often using data collected from a questionnaire, which might ask for a customer’s demographics, their level of satisfaction with various aspects of your company’s services (e.g., whether it was value for money, or whether the customer services department was helpful) as well as their likelihood of recommending your company to others.
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.parser import parse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb
#import plotly.plotly as py
#import plotly
#import plotly.graph_objs as go
from datetime import datetime
from dateutil.parser import parse
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
import statistics
file = pd.read_excel("C:/Users/abhinav.gaharwar/Downloads/key driver analysis/data.xlsx")
file.head(5)
# file.columns = ['primary rating' if col.startswith('Primary Question') else col for col in file.columns]
# file['Request Date']=pd.to_datetime(file['Request Date'],format='%Y-%m-%d %H:%M:%S')
# file['ReqDate'] = file['Request Date'].apply(lambda x : x.date())
# file['Responded on']=pd.to_datetime(file['Responded on'],format='%Y-%m-%d %H:%M:%S')
# file['ResDate'] = file['Responded on'].apply(lambda x : x.date())
# file['Customer Type'] = file['primary rating'].apply(lambda x: "Detractor" if x>=0 and x<=6 else ("Passive" if x>=7 and x<=8 else "Promoter"))
# file['Month']=file['Request Date'].apply(lambda x: x.month_name())
# #file.head(5)
# file_byCType = file[['Customer Type','ResDate']].groupby(['ResDate','Customer Type']).size()
# file_byCType = file_byCType.to_frame()
# file_byCType = file_byCType.reset_index()
# file_byCType.columns = ['ResDate', 'Customer Type', 'Count']
# #file_byCType.head()
# piv = file_byCType.pivot(index='ResDate', columns='Customer Type', values='Count').reset_index()
# piv['Total'] = piv['Detractor'] + piv['Passive'] + piv['Promoter']
# piv['NPS Score'] = round(((piv['Promoter'] - piv['Detractor']) / piv['Total'])*100, 2)
# piv['Detractor_%'] = round((piv['Detractor'] / piv['Total']) * 100, 2)
# piv['Detractor_%'] = [str(i)+'%' for i in piv['Detractor_%']]
# piv['Passive_%'] = round((piv['Passive'] / piv['Total']) * 100, 2)
# piv['Passive_%'] = [str(i)+'%' for i in piv['Passive_%']]
# piv['Promoter_%'] = round((piv['Promoter'] / piv['Total']) * 100, 2)
# piv['Promoter_%'] = [str(i)+'%' for i in piv['Promoter_%']]
# #piv.head(10)
# data=file[['Product Range','Staff Friendliness','Trial Room','Billing Experience','Ambience and Environment','primary rating','ResDate']]
# data['Customer Type'] = data['primary rating'].apply(lambda x: "Detractor" if x>=0 and x<=6 else ("Passive" if x>=7 and x<=8 else "Promoter"))
file=file.fillna(0)
categorical_list = []
numerical_list = []
for i in file.columns.tolist():
if file[i].dtype=='object':
categorical_list.append(i)
else:
numerical_list.append(i)
print('Number of categorical features:', str(len(categorical_list)))
print('Number of numerical features:', str(len(numerical_list)))
Correlation is a statistical technique that can show whether and how strongly pairs of variables are related.
A perfect positive correlation means that the correlation coefficient is exactly one . This implies that as one variable moves, either up or down, the other security moves in lockstep, in the same direction. A perfect negative correlation means that two variable move in opposite directions, while a zero correlation implies no relationship at all.
Collinearity implies two variables are near perfect linear combinations of one another. Multicollinearity involves more than two variables. In the presence of multicollinearity, regression estimates are unstable and have high standard errors.
data=file[['CA%','NPS Score','Resolution','Rep Sat Score','Talk Time','AHT','TSR','FCR %']]
# data['Customer Type'] = data['primary rating'].apply(lambda x: "Detractor" if x>=0 and x<=6 else ("Passive" if x>=7 and x<=8 else "Promoter"))
Collinearity in terms of Categorical Variable
import pandas_profiling as pp
pp.ProfileReport(data)
data.corr()
The key output from driver analysis is a measure of the relative importance of each of the predictor variables in predicting the outcome variable. These importance scores are also known as importance weights.
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.preprocessing import LabelEncoder
data1=file
my_df = data1.apply(LabelEncoder().fit_transform)
my_df.head()
Colnames = ['CA%','Resolution','Rep Sat Score','Talk Time','AHT','TSR','FCR %']
X = my_df[['CA%','Resolution','Rep Sat Score','Talk Time','AHT','TSR','FCR %']] #independent columns
y = my_df[['NPS Score']] #target column i.e price range
#apply SelectKBest class to extract top 5 best features
bestfeatures = SelectKBest(score_func=chi2, k=5)
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
#concat two dataframes for better visualization
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
print(featureScores.nlargest(5,'Score')) #print 5 best features
# from sklearn.ensemble import ExtraTreesClassifier
# import matplotlib.pyplot as plt
# model = ExtraTreesClassifier()
# model.fit(X,y)
# print(model.feature_importances_) #use inbuilt class feature_importances of tree based classifiers
# #plot graph of feature importances for better visualization
# feat_importances = pd.Series(model.feature_importances_, index=X.columns)
# feat_importances.nlargest(5).plot(kind='barh')
# plt.show()
Performance reflects the average rating of variables attained by Agents on the basis of there Work.
In the order
1)Talk Time
2)AHT
3)CA %
4)TSR
5)FCR %
##Colnames = ['Product Range','Staff Friendliness','Trial Room','Billing Experience','Ambience and Environment']
df = pd.read_excel("C:/Users/abhinav.gaharwar/Downloads/key driver analysis/data.xlsx")
x_time=my_df['Talk Time']
X_time=[float(x) for x in x_time]
Xtime_mean=np.mean(X_time)
print(Xtime_mean)
x_aht=my_df['AHT']
X_aht=[float(x) for x in x_aht]
Xaht_mean=np.mean(X_aht)
print(Xaht_mean)
x_ca=my_df['CA%']
X_ca=[float(x) for x in x_ca]
Xca_mean=np.mean(X_ca)
print(Xca_mean)
x_tsr=my_df['TSR']
X_tsr=[float(x) for x in x_tsr]
Xtsr_mean=np.mean(X_tsr)
print(Xtsr_mean)
x_fcr=my_df['FCR %']
X_fcr=[float(x) for x in x_fcr]
Xfcr_mean=np.mean(X_fcr)
print(Xfcr_mean)
data = {'Impact on NPS':[5432.1,3526.5,3447.25,3061.1,3043.6], 'Attributes':['Talk Time','AHT','CA %','TSR','FCR'],'Performance':[118,118.6,117.7,56.8,114.3]}
df = pd.DataFrame(data)
df
import plotly.express as px
iris = px.data.iris()
fig = px.scatter(df, x="Performance", y="Impact on NPS", color="Attributes",
size='Impact on NPS', hover_data=['Performance'])
fig.show()
x-axis represent performance, y-axis represent Impact on NPS
AHT,CA,FCR is performing well but it has a very low impact on NPS , has less weight attached to it. TALK TIME is the most important factor in the variables as its Impact on NPS and Performance both have a considerable influence .
Improving TALK TIME can improve the NPS by a significant amount.